In [1]:
import math,random
import quandl
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,SGDRegressor,BayesianRidge,ARDRegression,PassiveAggressiveRegressor,TheilSenRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,StackingRegressor,VotingRegressor
from sklearn.neural_network import MLPRegressor
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
In [2]:
stock = "MSFT"
daysToForecast = 251
In [3]:
def getStockData(stock):
    quandl.ApiConfig.api_key = "qWcicxSctVxrP9PhyneG"
    allData = quandl.get('WIKI/'+stock)
    return allData
In [4]:
def FormatDataForModel(dataArray):
    dataArray = dataArray[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
    dataArray['HL_PCT'] = (dataArray['Adj. High'] - dataArray['Adj. Close']) / dataArray['Adj. Close'] * 100.0
    dataArray['PCT_change'] = (dataArray['Adj. Close'] - dataArray['Adj. Open']) / dataArray['Adj. Open'] * 100.0
    dataArray = dataArray[['Adj. Close', 'HL_PCT', 'PCT_change','Adj. Volume']]
    dataArray.fillna(-99999, inplace=True)
    return dataArray
In [5]:
def PreprocessData(mlData,daysToForecast):
    forecast_col = 'Adj. Close'
    forecast_out = int(math.ceil(0.12*daysToForecast))
    mlData['label'] = mlData[forecast_col].shift(-forecast_out)
    #mlData.dropna(inplace=True)
    X = np.array(mlData.drop(['label'],1))
    X = preprocessing.scale(X)
    X_data = X[-daysToForecast:]
    X = X[:-daysToForecast]
    forecastData = mlData[-daysToForecast:]
    trainData= mlData[:-daysToForecast]
    y = np.array(trainData['label'])
    response = [X,y,X_data,forecastData]
    return response
In [6]:
def TrainAndPredict(model,X,y,X_data):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model.fit(X_train, y_train)
    accuracy = model.score(X_test, y_test)
    prediction = model.predict(X_data)
    return accuracy, prediction
In [7]:
def addPredictionToForecast(prediction,forecastData):
    forecastData = forecastData[['Adj. Close']]
    forecastData = forecastData.rename(columns={'Adj. Close':'EOD'})
    forecastData['prediction'] = prediction[:]
    return forecastData
In [8]:
def GraphPredictions(forecastData,stock):
    fig = px.line(forecastData)
    fig.update_layout(title=stock,
                   xaxis_title='Time',
                   yaxis_title='Price')
    fig.show()
In [9]:
def GraphAllData(allData,forecastData,stock):
    result = pd.concat([allData['Adj. Close'],forecastData['prediction']],axis =1, sort=False)
    fig = px.line(result)
    fig.update_layout(title=stock,
                   xaxis_title='Time',
                   yaxis_title='Price')
    fig.show()
In [10]:
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
forecastData = addPredictionToForecast(prediction,forecastData)
In [11]:
print(accuracy)
0.979048166298415
In [12]:
GraphPredictions(forecastData,stock)
In [13]:
GraphAllData(allData,forecastData,stock)
In [14]:
daysToForecast = 251*3
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
forecastData = addPredictionToForecast(prediction,forecastData)
In [15]:
print(accuracy)
0.9415952614031434
In [16]:
GraphPredictions(forecastData,stock)
In [17]:
GraphAllData(allData,forecastData,stock)
In [18]:
daysToForecast = 251*5
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
forecastData = addPredictionToForecast(prediction,forecastData)
In [19]:
print(accuracy)
0.8980255913433519
In [20]:
GraphPredictions(forecastData,stock)
In [21]:
GraphAllData(allData,forecastData,stock)